library(tidyverse)
library(here)
library(plotly)
theme_set(theme_bw())
data <- read_csv(here::here("data/jackson.csv"),
col_types = cols(
.default = col_double(),
album_uri = col_character(),
album_name = col_character(),
album_img = col_character(),
album_release_date = col_character(),
album_release_year = col_date(format = ""),
album_popularity = col_integer(),
track_name = col_character(),
track_uri = col_character(),
key = col_character(),
mode = col_character(),
time_signature = col_integer(),
key_mode = col_character(),
track_popularity = col_integer()
))
data %>%
glimpse()
Observations: 500
Variables: 23
$ album_uri <chr> "5T9tTjPIfjbUJGRJdYOOLl", "5T9tTjPIfjbUJGRJdYOOLl", "5T9tTjPIfjbU...
$ album_name <chr> "Jackson Do Pandeiro Volume 1: Tum, Tum, Tum!", "Jackson Do Pande...
$ album_img <chr> "https://i.scdn.co/image/5dcc4a0cad740f1ee0774196d0a14f3693ef8879...
$ album_release_date <chr> "1958-11-11", "1958-11-11", "1958-11-11", "1958-11-11", "1958-11-...
$ album_release_year <date> 1958-11-11, 1958-11-11, 1958-11-11, 1958-11-11, 1958-11-11, 1958...
$ album_popularity <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ track_name <chr> "Tum, Tum, Tum", "Pacífico Pacato", "Nortista Quatrocentão", "Sem...
$ track_uri <chr> "6cCYhV6fU68uzbjWPG9V7x", "6Gu7y9SgtVTGh8YGhDPtCe", "1hq7M7cJtvDg...
$ danceability <dbl> 0.501, 0.663, 0.550, 0.447, 0.544, 0.571, 0.495, 0.572, 0.500, 0....
$ energy <dbl> 0.987, 0.962, 0.947, 0.969, 0.972, 0.926, 0.967, 0.986, 0.947, 0....
$ key <chr> "A", "F", "D", "G", "E", "F", "E", "C", "F", "A#", "E", "F", "D#"...
$ loudness <dbl> 2.561, 1.137, 1.621, 2.743, 2.513, 2.414, 2.375, 2.597, 3.078, 3....
$ mode <chr> "major", "major", "major", "major", "minor", "major", "minor", "m...
$ speechiness <dbl> 0.0429, 0.1810, 0.0469, 0.0549, 0.0502, 0.0344, 0.0576, 0.0367, 0...
$ acousticness <dbl> 0.718, 0.738, 0.666, 0.759, 0.787, 0.651, 0.712, 0.194, 0.286, 0....
$ instrumentalness <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ liveness <dbl> 0.282, 0.200, 0.251, 0.333, 0.176, 0.342, 0.321, 0.301, 0.323, 0....
$ valence <dbl> 0.963, 0.961, 0.923, 0.899, 0.783, 0.961, 0.755, 0.989, 0.957, 0....
$ tempo <dbl> 101.676, 113.562, 116.125, 116.023, 112.863, 133.065, 117.822, 10...
$ duration_ms <dbl> 158133, 139773, 163173, 143733, 151653, 157480, 158133, 154680, 1...
$ time_signature <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,...
$ key_mode <chr> "A major", "F major", "D major", "G major", "E minor", "F major",...
$ track_popularity <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
data %>%
ggplot(aes(sample=danceability)) +
stat_qq()
data %>%
ggplot(aes(danceability, ..density..)) +
geom_histogram(binwidth = 0.05,
fill="grey",
color="black")
data %>%
ggplot(aes(sample=speechiness)) +
stat_qq()
data %>%
ggplot(aes(speechiness, ..density..)) +
geom_histogram(binwidth = 0.1,
fill="grey",
color="black")
data <- data %>%
mutate(duration_s = duration_ms/1000)
data %>%
select(duration_s) %>%
glimpse()
Observations: 500
Variables: 1
$ duration_s <dbl> 158.133, 139.773, 163.173, 143.733, 151.653, 157.480, 158.133, 154.680, 1...
data %>%
ggplot(aes(sample=duration_s)) +
stat_qq()
data %>%
ggplot(aes(duration_s, ..density..)) +
geom_histogram(binwidth = 60,
fill="grey",
color="black")
p <- data %>%
group_by(track_name) %>%
unique() %>%
ggplot(aes(x=speechiness,
y=danceability)) +
geom_point(alpha=0.4)
ggplotly(p)
We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`
data %>%
group_by(track_name) %>%
unique() %>%
ggplot(aes(speechiness,danceability)) +
stat_density2d(aes(fill = ..level..), geom = "polygon")
Ultimo album “1981: Isso é que é Forró!”
data <- data %>%
mutate(remaster = album_release_date > "1981-30-12")
data %>%
filter(album_release_date <= "1981-30-12") %>%
ggplot(aes(x=album_release_year,
duration_s,
group=album_release_year)) +
geom_boxplot()
data %>%
ggplot(aes(x=as.factor(album_release_year),
duration_s,
group=album_release_year,
color=remaster)) +
geom_jitter(position = position_dodge(width = 0.9),
alpha=0.3) +
geom_boxplot(outlier.colour = NA,position = "dodge", alpha=0.6) +
theme(axis.text.x = element_text(angle = 30, hjust = 1))